Background

For this project, I decided to recreate Hans Rosling’s population and GDP graph. The graph can be found here.

Since the video did not share the dataset, I downloaded another dataset from Our World in Data.

Data Overview and Wrangling

Although the year starts from 1870, I decided to use data only after 1950 because most countries miss values for population and GDP for years before 1950.

le <- read.csv("data/life_expectancy_data.csv") %>% 
  janitor::clean_names() %>% 
  na.omit() %>% 
  filter(entity != "World",
         year >= 1950) 

There are 166 countries in the dataset, but it misses information about which continent a country belongs to.

I used this website as a reference to continent, and manually labeled each country.

le %>% distinct(entity) %>% nrow()
## [1] 166
le <- le %>% 
  mutate(continent = case_when
         (entity %in% c("Algeria", "Angola", "Benin", "Botswana",
                        "Burkina Faso", "Burundi", "Cameroon", "Cape Verde",
                        "Central African Republic", "Chad", "Comoros", "Congo",
                        "Cote d'Ivoire", "Democratic Republic of Congo",
                        "Djibouti", "Egypt", "Equatorial Guinea", "Eswatini",
                        "Ethiopia", "Gabon", "Gambia", "Ghana", "Guinea",
                        "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya",
                        "Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", 
                        "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda",
                        "Sao Tome and Principe", "Senegal", "Seychelles",
                        "Sierra Leone", "South Africa", "Sudan", "Tanzania", "Togo",
                        "Tunisia", "Uganda", "Zambia", "Zimbabwe"
                        )
           ~ "Africa",
           
          entity %in% c("Barbados", "Canada", "Costa Rica", "Cuba", "Dominica",
                        "Dominican Republic", "El Salvador", "Guatemala", "Haiti",
                        "Honduras", "Jamaica", "Mexico", "Nicaragua",
                        "Panama", "Puerto Rico", "Saint Lucia", 
                        "Trinidad and Tobago", "United States"
                        ) 
           ~ "North America",
           
          entity %in% c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia",
                        "Ecuador", "Paraguay", "Peru", "Uruguay", "Venezuela"
                        ) 
           ~ "South America",
           
          entity %in% c("Afghanistan", "Azerbaijan", "Bahrain", "Bangladesh",
                        "Cambodia", "China", "Cyprus", "Georgia", "Hong Kong",
                        "India", "Indonesia", "Iran", "Iraq", "Israel", "Japan",
                        "Jordan", "Kazakhstan", "Kuwait", "Kyrgyzstan", "Laos",
                        "Lebanon", "Malaysia", "Moldova", "Mongolia", "Myanmar",
                        "Nepal", "North Korea", "Oman", "Pakistan", "Palestine",
                        "Philippines", "Qatar", "Saudi Arabia", "Singapore",
                        "South Korea", "Sri Lanka", "Syria", "Taiwan", "Tajikistan",
                        "Thailand", "Turkey", "Turkmenistan", "United Arab Emirates",
                        "Uzbekistan", "Vietnam", "Yemen"
                        ) 
           ~ "Asia",
           
          entity %in% c("Albania", "Armenia", "Austria", "Belarus", "Belgium",
                        "Bosnia and Herzegovina", "Bulgaria", "Croatia", "Czechia",
                        "Denmark", "Estonia", "Finland", "France", "Germany", 
                        "Greece", "Iceland", "Ireland", "Italy", "Latvia",
                        "Lithuania", "Luxembourg", "Malta", "Montenegro",
                        "Netherlands", "North Macedonia", "Norway", "Poland",
                        "Portugal", "Romania", "Serbia", "Slovakia", "Slovenia",
                        "Spain", "Sweden", "Switzerland", "Ukraine", 
                        "United Kingdom", "Hungary"
                        ) 
           ~ "Europe",
           
          entity %in% c("Australia", "New Zealand"
                        ) 
           ~ "Oceania"
  ))

I should have just used left_join for the sake of efficiency…

Data Visualization

I used plotly and crosstalk to make a visualization. I think crosstalk does not go well with animation plots.

shared_le <- SharedData$new(le)

bscols(widths = c(2,NA),
  list(
    filter_select("continent", "Continent", shared_le, ~continent)
  ),
  ggplotly(shared_le %>% 
  ggplot(aes(gdp_per_capita, life_expectancy)) +
  geom_point(aes(size = population_historical_estimates, frame = year, ids = entity, color = continent)) +
  scale_x_log10() +
  labs(x = "GDP per Capita", y = "Life Expectancy", size = ""))
)

Show countries in North America

le_na <- le %>% filter(continent == "North America")
shared_le_na <- SharedData$new(le_na)

bscols(widths = c(2,NA),
  list(
    filter_select("entity", "Country", shared_le_na, ~entity)
  ),
  ggplotly(
  ggplot(data = shared_le_na, aes(gdp_per_capita, life_expectancy)) +
  geom_point(aes(size = population_historical_estimates, frame = year, color = entity)) +
  scale_x_log10() +
  labs(x = "GDP per Capita", y = "Life Expectancy", color = "Country", size = ""))
)

Better One

bscols(widths = c(2,4,6),
  list(
    filter_select("entity", "Country", shared_le, ~entity),
    filter_select("year", "Years", shared_le, ~year)
  ),
  add_lines(shared_le %>%
  plot_ly(x = ~year, y = ~life_expectancy, color = ~entity, id = ~entity), showlegend = FALSE),
  
  add_lines(shared_le %>%
  plot_ly(x = ~year, y = ~gdp_per_capita, color = ~entity, id = ~entity))
)

Reflection